Data Analysis with Python in Jupyter Notebook using Plotly¶

In [1]:
# Libraries needed to run the code
import pandas as pd
import sqlalchemy as sa
import plotly.express as px
import plotly.graph_objects as go
from sqlalchemy.engine import create_engine
In [2]:
import pandas
dataframe = pandas.read_csv('/Users/diegogabrielvasquezgotopo/Desktop/Data Analyst Portfolio/SQL Database/Customer and Products/Orders.csv')
dataframe
Out[2]:
OrderID CreationDate TotalDue Status CustomerID SalespersonID
0 1000 5/14/16 0:00 118.22 paid 413 130
1 1001 7/31/16 0:00 211.22 returned 128 102
2 1002 7/9/16 0:00 139.81 past due 791 115
3 1003 4/4/16 0:00 323.71 paid 974 139
4 1004 2/16/16 0:00 214.02 paid 866 102
... ... ... ... ... ... ...
195 1195 5/18/16 0:00 30.43 paid 107 116
196 1196 5/19/16 0:00 143.06 paid 862 115
197 1197 2/29/16 0:00 233.08 paid 712 134
198 1198 12/25/15 0:00 206.12 cancelled 968 130
199 1199 5/5/16 0:00 55.35 paid 672 147

200 rows × 6 columns

In [3]:
pip install psycopg2
Requirement already satisfied: psycopg2 in /opt/anaconda3/lib/python3.13/site-packages (2.9.11)
Note: you may need to restart the kernel to use updated packages.

SQL CustomerandProducts Database Connection¶

In [3]:
import pandas as pd
from sqlalchemy import create_engine
username = 'postgres'      # or your username
password = 'Pnlgsveb6'  # your PostgreSQL password
host = 'localhost'         # or the server IP
port = '5432'              # default PostgreSQL port
database = 'CustomerandProducts'

engine = create_engine("postgresql+psycopg2://postgres:Pnlgsveb6@127.0.0.1:5432/CustomerandProducts")
In [4]:
import pandas as pd
pd.read_sql("SELECT table_name FROM information_schema.tables WHERE table_schema='public';", engine)
Out[4]:
table_name
0 customers_csv
1 v_orders
2 orders
3 product
4 salesperson
5 orderitem
6 customers
In [5]:
df_orders = pd.read_sql("SELECT * FROM orders LIMIT 10;", engine)
df_orders
Out[5]:
orderid creationdate totaldue status customerid salespersonid
0 1000 2016-05-14 118.22 paid 413 130
1 1001 2016-07-31 211.22 returned 128 102
2 1002 2016-07-09 139.81 past due 791 115
3 1003 2016-04-04 323.71 paid 974 139
4 1004 2016-02-16 214.02 paid 866 102
5 1005 2016-07-25 7.38 paid 798 118
6 1006 2016-07-16 21.48 paid 606 133
7 1007 2015-06-30 157.13 paid 265 111
8 1008 2016-06-19 60.86 paid 831 146
9 1009 2016-07-01 276.78 cancelled 789 108
In [6]:
# Example: read data from the Customers table
df = pd.read_sql('SELECT * FROM customers;', engine)

# Display the first few rows
print(df.head())
   customerid  firstname lastname         city state  zipcode
0         100      Carol     Shaw      Seattle    WA    98121
1         101  Elizabeth     Carr       Austin    TX    78732
2         102     Ernest    Ramos  Kansas City    MO    64199
3         103       Jane   Carter       Irving    TX    75037
4         104     Martha   Cooper        Tampa    FL    33625
In [7]:
df = pd.read_sql_query('Select * from V_orders', engine)
df.head()
Out[7]:
orderdate salesperson amount_of_orders total_due
0 2015-12-08 Edward Kelley 1 115.41
1 2016-06-20 Marie Hall 1 143.91
2 2016-03-12 Victor Moore 1 331.88
3 2015-10-29 Joan Ruiz 1 259.54
4 2016-05-01 Sara Simpson 1 250.92
In [8]:
import plotly.express as px
#Bar chart of sales by year
fig = px.bar(df, x='orderdate', y='total_due', title='Total Sales by Year')
fig.show()
In [ ]:
# SQL CustomerandProducts database connection
In [9]:
df['orderdate'] = pd.to_datetime(df['orderdate'])

#Create a 'year' column (as integer)
df['year'] = df['orderdate'].dt.year.astype(int)

#Group data by year and sum total sales
sales_by_year = df.groupby('year', as_index=False)['total_due'].sum()

# Create bar chart
fig = px.bar(
    sales_by_year,
    x='year',
    y='total_due',
    title='Total Sales by Year',
    labels={'year': 'Year', 'total_due': 'Total Sales ($)'},
    text_auto=True  # shows values on top of bars
)

fig.show()
In [35]:
!pip install dash
Collecting dash
  Downloading dash-3.2.0-py3-none-any.whl.metadata (10 kB)
Requirement already satisfied: Flask<3.2,>=1.0.4 in /opt/anaconda3/lib/python3.13/site-packages (from dash) (3.1.0)
Requirement already satisfied: Werkzeug<3.2 in /opt/anaconda3/lib/python3.13/site-packages (from dash) (3.1.3)
Requirement already satisfied: plotly>=5.0.0 in /opt/anaconda3/lib/python3.13/site-packages (from dash) (5.24.1)
Requirement already satisfied: importlib-metadata in /opt/anaconda3/lib/python3.13/site-packages (from dash) (8.5.0)
Requirement already satisfied: typing-extensions>=4.1.1 in /opt/anaconda3/lib/python3.13/site-packages (from dash) (4.12.2)
Requirement already satisfied: requests in /opt/anaconda3/lib/python3.13/site-packages (from dash) (2.32.3)
Collecting retrying (from dash)
  Downloading retrying-1.4.2-py3-none-any.whl.metadata (5.5 kB)
Requirement already satisfied: nest-asyncio in /opt/anaconda3/lib/python3.13/site-packages (from dash) (1.6.0)
Requirement already satisfied: setuptools in /opt/anaconda3/lib/python3.13/site-packages (from dash) (72.1.0)
Requirement already satisfied: Jinja2>=3.1.2 in /opt/anaconda3/lib/python3.13/site-packages (from Flask<3.2,>=1.0.4->dash) (3.1.6)
Requirement already satisfied: itsdangerous>=2.2 in /opt/anaconda3/lib/python3.13/site-packages (from Flask<3.2,>=1.0.4->dash) (2.2.0)
Requirement already satisfied: click>=8.1.3 in /opt/anaconda3/lib/python3.13/site-packages (from Flask<3.2,>=1.0.4->dash) (8.1.8)
Requirement already satisfied: blinker>=1.9 in /opt/anaconda3/lib/python3.13/site-packages (from Flask<3.2,>=1.0.4->dash) (1.9.0)
Requirement already satisfied: MarkupSafe>=2.1.1 in /opt/anaconda3/lib/python3.13/site-packages (from Werkzeug<3.2->dash) (3.0.2)
Requirement already satisfied: tenacity>=6.2.0 in /opt/anaconda3/lib/python3.13/site-packages (from plotly>=5.0.0->dash) (9.0.0)
Requirement already satisfied: packaging in /opt/anaconda3/lib/python3.13/site-packages (from plotly>=5.0.0->dash) (24.2)
Requirement already satisfied: zipp>=3.20 in /opt/anaconda3/lib/python3.13/site-packages (from importlib-metadata->dash) (3.21.0)
Requirement already satisfied: charset-normalizer<4,>=2 in /opt/anaconda3/lib/python3.13/site-packages (from requests->dash) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in /opt/anaconda3/lib/python3.13/site-packages (from requests->dash) (3.7)
Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/anaconda3/lib/python3.13/site-packages (from requests->dash) (2.3.0)
Requirement already satisfied: certifi>=2017.4.17 in /opt/anaconda3/lib/python3.13/site-packages (from requests->dash) (2025.10.5)
Downloading dash-3.2.0-py3-none-any.whl (7.9 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.9/7.9 MB 49.0 MB/s eta 0:00:00
Downloading retrying-1.4.2-py3-none-any.whl (10 kB)
Installing collected packages: retrying, dash
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2/2 [dash]━━━━━━ 1/2 [dash]
Successfully installed dash-3.2.0 retrying-1.4.2
In [10]:
#Web-based Dashboard using Dash
import dash
from dash import dcc
from dash import html
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd

fig = go.Figure()

for index, row in df.iterrows():
    df_salesperson = df[df['salesperson']== row.salesperson]
    fig.add_trace(
        go.Scatter(x=list(df_salesperson.orderdate),
            y=list(df_salesperson['total_due']),
            name=row.salesperson,
            text=list(df['total_due'].round(1)),
            textposition="top left"))

salesperson_list = []
visibility = [False for i in range(len(df['salesperson']))]
for index, row in df.iterrows():
    visibility = [False for i in range(len(df['salesperson']))]
    visibility[index] = True
    d = dict(label=row.salesperson,
          method="update",
        args=[{"visible": visibility},
               {"title": "Order Summary",
                "annotations": []}])

    salesperson_list.append(d)
    

fig.update_layout(
    updatemenus=[
        dict(
            active=0,
            buttons=salesperson_list,
        )
    ])
app = dash.Dash()
app.layout = html.Div([
    dcc.Graph(figure=fig)
])

app.run(debug=True, use_reloader=False)

Total Sales by State¶

In [11]:
# 2️⃣ SQL JOIN: Combine Orders with Customers on customer_id
query = """
SELECT 
    c.state,
    o.totaldue
FROM orders o
JOIN customers c
ON o.customerid = c.customerid;
"""
df = pd.read_sql(query, engine)

# Group by state and sum total sales
sales_by_state = df.groupby('state', as_index=False)['totaldue'].sum()

# bar chart
fig = px.bar(
    sales_by_state,
    x='state',
    y='totaldue',
    title='💰 Total Sales by State',
    labels={'state': 'State', 'totaldue': 'Total Sales ($)'},
    text_auto=True,
    color='totaldue'
)

fig.show()
In [ ]:
 

Total sales by State (color shaded map)¶

In [12]:
# SQL JOIN: Orders + Customers to get each sale’s state
query = """
SELECT 
    c.state,
    o.totaldue
FROM orders o
JOIN customers c
ON o.customerid = c.customerid;
"""

# Load joined data into DataFrame
df = pd.read_sql(query, engine)

# Group by state and sum total sales
sales_by_state = df.groupby('state', as_index=False)['totaldue'].sum()

# Ensure state abbreviations are uppercase (e.g., 'CA', 'TX')
sales_by_state['state'] = sales_by_state['state'].str.upper()

# Create U.S. Choropleth Map
fig = px.choropleth(
    sales_by_state,
    locations='state',            # Column with state abbreviations
    locationmode='USA-states',    # Use two-letter state codes
    color='totaldue',            # Value to color by
    scope='usa',                  # Show only the U.S.
    color_continuous_scale='Blues',
    title='💰 Total Sales by U.S. State'
)

# Show the map
fig.show()
In [13]:
import dash
from dash import dcc, html, Input, Output
import plotly.express as px
import pandas as pd
from sqlalchemy import create_engine

# 1️⃣ Connect to your PostgreSQL database
engine = create_engine("postgresql+psycopg2://postgres:YOUR_PASSWORD@localhost:5432/CustomerandProducts")

# 2️⃣ Load data (Orders + Customers)
query = """
SELECT 
    o.creationdate,
    o.totaldue,
    o.orderid,
    c.state
FROM orders o
JOIN customers c
ON o.customerid = c.customerid;
"""
df = pd.read_sql(query, engine)

# 3️⃣ Prepare and clean data
df['creationdate'] = pd.to_datetime(df['creationdate'])
df['year'] = df['creationdate'].dt.year
df['state'] = df['state'].str.upper()

# 4️⃣ Initialize Dash app
app = dash.Dash(__name__)
app.title = "Sales by State Dashboard"

# 5️⃣ Layout with dropdowns and graph
app.layout = html.Div([
    html.H1("💰 Total Sales by U.S. State", style={'textAlign': 'center'}),

    html.Div([
        html.Label("Select Year:"),
        dcc.Dropdown(
            id='year_dropdown',
            options=[{'label': str(y), 'value': y} for y in sorted(df['year'].unique())],
            value=df['year'].max(),
            clearable=False
        )
    ], style={'width': '30%', 'margin': 'auto'}),

    dcc.Graph(id='sales_map')
])

# 6️⃣ Callback to update map
@app.callback(
    Output('sales_map', 'figure'),
    Input('year_dropdown', 'value')
)
def update_map(selected_year):
    # Filter data by year
    filtered_df = df[df['year'] == selected_year]

    # Group by state
    sales_by_state = filtered_df.groupby('state', as_index=False)['totaldue'].sum()

    # Create map
    fig = px.choropleth(
        sales_by_state,
        locations='state',
        locationmode='USA-states',
        color='totaldue',
        scope='usa',
        color_continuous_scale='Blues',
        title=f'Total Sales by State - {selected_year}'
    )

    fig.update_layout(
        geo=dict(bgcolor='rgba(0,0,0,0)'),
        margin={"r":0,"t":50,"l":0,"b":0}
    )
    return fig

# 7️⃣ Run the app
if __name__ == "__main__":
    app.run(debug=True)
In [14]:
df = pd.read_sql_query("""
Select Customers.state, Sum(Orders.totaldue) as total_sales from customers
join Orders on Orders.customerid = Customers.customerid
where Extract(year from CreationDate) = 2016
group by customers.state
""", engine
)

df.head()
Out[14]:
state total_sales
0 AL 454.07
1 AZ 292.89
2 CA 2359.89
3 CO 845.82
4 CT 107.40
In [ ]: